clear
set more off
set type double

********************************************************************************
********************************************************************************
********************************************************************************
**** STATA Guide
**** Building the SDG economy: Needs, spending, and financing for universal achievement of the Sustainable Development Goals
**** Homi Kharas & John McArthur
**** Final data and do-files compiled by Selen Özdoğan
**** October 2019
********************************************************************************
**** 2. Estimate public spending needs to achieve the SDG economy in 2025
********************************************************************************
********************************************************************************
********************************************************************************


/*
	NAMING CONVENTIONS: 
	a_ = filled in missing values with interpolation
	_alt = applied 1.13x GDP/pc multiplier to later years
	l_ = natural log 
*/

********************************************************************************
**** Outline *******************************************************************
********************************************************************************
* 1. Agriculture
*		A . FAO/IFAD/WFP. 2015
*		B . Authors' calculations
* 2. Education
* 		A . The International Commission on Financing Global Education Opportunity. 2016.
* 3. Health
*		A . Stenberg et al. 2017
* 4. Social Spending
*		A . World Data Lab - World Poverty Clock
*		B . ILO - World Social Protection Report Data 2017-2019
* 5. Infrastructure
*		A . Rozenberg and Fay. 2019
* 6. WASH
*		A . Government Spending Watch - Spending Data
*		B . Hutton and Varughese. 2016
* 7. Conservation
*		A . McCarthy et al. 2012
* 8. Justice
*		A . Manuel et al. 2019
* 9. Merge all datasets together
*		A . Total and per capita country need
* 		B . 2025 needs by income group
* 		C . % of GDP country needs


********************************************************************************
********************************************************************************
*** 1. Agriculture
********************************************************************************
********************************************************************************

********************************************************************************
** A . FAO/IFAD/WFP. 2015
**		http://www.fao.org/3/a-i4951e.pdf - Adx. 2
**		Average annual economy wide additional rural investments in the Zhbotmea scenario, 2016-30
**		Additional public rural investment on improving infrastructure by region
**		2013 USD
********************************************************************************

import excel "input\costing\FAO Hunger Annex 2 Table A2.xlsx", sheet("Table 1") firstrow clear

 label variable PGTtotal "Poverty gap transfers, total average annual 2016-2030"
 label variable PGTrural "Poverty gap transfers, rural average annual 2016-2030"
 label variable inv_total "Additional investments, total average annual 2016-2030 (Const. 2013 USD)"
 label variable inv_rural "Additional investments, rural average annual 2016-2030 (Const. 2013 USD)"
 label variable addlinvest_pctgdp "Additional investments, % of GDP average annual 2016-2030"
 
rename country countryname 
drop if countryname == ""
rename region FAO_region
drop incomegroup

* Clean country names to add countrycodes
CLEAN_COUNTRY_NAMES countryname

* Add countrycodes
merge m:1 countryname using "output\UN_memberstates.dta", keepusing(countrycode)
drop if _merge==1
drop _merge

* Exclude high income countries
merge 1:1 countrycode using "output\merged_full_totals_cons.dta", keepusing(incomegroup2015 region)
drop _merge
drop if incomegroup2015 == "H"

* Remove rural infrastructure from rural investment - Table 6 has total rural investment by region and category
replace inv_rural = inv_rural * 1000000

* Convert to USD 2015 constant
gen merge = 1
merge m:1 merge using "output\GDPratio_2013_to_2015.dta", keepusing(ratio_13_15)
drop _merge merge

gen inv_rural_k = inv_rural * ratio_13_15
 label variable inv_rural_k "Additional investments, rural average annual 2016-2030 (Const. 2015 USD)"

gen rural_infrastructure = .
 label variable rural_infrastructure "% of rural investment that goes to infrastructure"
replace rural_infrastructure = (73 + 36) / 637 if region == "EAS"
replace rural_infrastructure = (115 + 67) / 699 if region == "LCN"
replace rural_infrastructure = (55 + 32) / 449 if region == "MEA"
replace rural_infrastructure = (3768 + 1884) / 19116 if region == "SAS"
replace rural_infrastructure = (29057 + 14537) / 117993 if region == "SSF"
 
 * Apply infrastructure as share of total to region ECS - no reported regional value
replace rural_infrastructure = .3573 if region == "ECS"

* Remove rural infrastructure and apply regional ratios to total investment (for each country in that region)
gen inv_exinf = inv_rural_k * (1 - rural_infrastructure)
 label variable inv_exinf "Additional investments excl. infrastructure, avg. annual 2016-2030 (Const. 2015 USD)"

* Use only public investment - Table 6 has share of public investment in rural (64.29%); need to remove infrastructure and recalculate
 * Remove infrastructure from total public and total, calculate share
  * Public = 89,294 - 43,006 (infrastructure) = 46,288
  * Total = 138,894 - 49,624 (infrastructure) = 89,270
  * Share of rural ag investment that is public = 46,288 / 89,270 = 51.85%

* Apply public investment share to each country
gen add_ag_need1530 = inv_exinf * .5185
 label variable add_ag_need1530 "Public additional investments excl. infrastructure, avg. annual 2016-2030 (Cosnt. 2015 USD)" 

merge 1:1 countrycode using "output\merged_full_totals_cons.dta", keepusing(pc_aggdp_2015 pc_aggdp_2030 pc_aggdp_1530 incpc_aggdp_2015 incpc_aggdp_2030 incpc_aggdp_1530 pop_avg1530 pop2025 pop2015 pop2030)
drop if _merge == 2
drop _merge 
save "output\costing\ag_public_rural.dta", replace

* Calculate per capita by country
use "output\costing\ag_public_rural.dta", clear

* Per capita - Assumes 2016-2030 average annual can be applied to 2025 population
gen pc_add_ag_need1530 = add_ag_need1530 / pop2025
 label variable pc_add_ag_need1530 "Per Capita public additional investments excl. infrastructure, avg. annual applied to 2025 (Const. 2015 USD)"
gen pc_add_ag_need2015=add_ag_need1530 / pop2015
 label variable pc_add_ag_need2015 "Per Capita public additional investments excl. infrastructure, avg. annual applied to 2015 (Const. 2015 USD)"
gen pc_add_ag_need2030 = add_ag_need1530 / pop2030
 label variable pc_add_ag_need2015 "Per Capita public additional investments excl. infrastructure, avg. annual applied to 2030 (Const. 2015 USD)"

********************************************************************************
** B . Authors' calculations
**		Spending on agriculture in 2015
********************************************************************************

* Use income group per capita spending estimates if country-level spending observations are missing
replace pc_aggdp_1530 = incpc_aggdp_1530 if pc_aggdp_1530 == .

* To match avg. annual need, use the average of 2015-2030 p.c. spending 
gen pc_ag_need1530 = pc_add_ag_need1530 + pc_aggdp_1530
 label variable pc_ag_need1530 "Per capita public investments in agriculture, avg. annual + spending (Const. 2015 USD)"
gen  pc_ag_need2015=pc_add_ag_need2015+pc_aggdp_2015
 label variable pc_ag_need2015 "Per capita public investments in agriculture 2015, avg. annual + spending (Const. 2015 USD)"
gen  pc_ag_need2030=pc_add_ag_need2030+pc_aggdp_2030
 label variable pc_ag_need2030 "Per capita public investments in agriculture 2030, avg. annual + spending (Const. 2015 USD)"

* Aggregate per capita need by income group for countries with country-level data
bysort incomegroup2015: egen CLincpc_ag_need1530 = wtmean(pc_ag_need1530), weight(pop2025)
 label variable CLincpc_ag_need1530 "Total sample country per capita public investments in agriculture by income group, avg. annual applied to 2025 (Const. 2015 USD)"

bysort incomegroup2015: egen CLincpc_ag_need2015 = wtmean(pc_ag_need2015), weight(pop2015)
 label variable CLincpc_ag_need1530 "Total sample country per capita public investments in agriculture by income group, avg. annual applied to 2015 (Const. 2015 USD)"

bysort incomegroup2015: egen CLincpc_ag_need2030 = wtmean(pc_ag_need2030), weight(pop2030)
 label variable CLincpc_ag_need2030 "Total sample country per capita public investments in agriculture by income group, avg. annual applied to 2030 (Const. 2015 USD)"

* Fill missing observations for per capita need using income group aggregates and calculate need by country

* Per capita need using income group aggregates for missing
replace pc_ag_need1530 = CLincpc_ag_need1530 if pc_ag_need1530 == .
replace pc_ag_need2015=CLincpc_ag_need2015 if pc_ag_need2015==.
replace pc_ag_need2030=CLincpc_ag_need2030 if pc_ag_need2030==.

* Total agricultural need
gen ag_need1530 = pc_ag_need1530 * pop2025
 label variable ag_need1530 "Public investments in agriculture, avg. annual + spending (Const. 2015 USD)"
gen ag_need2015 = pc_ag_need2015 * pop2015
 label variable ag_need2015 "Public investments in agriculture, avg. annual + spending 2015 (Const. 2015 USD)"
gen ag_need2030 = pc_ag_need2030 * pop2030
 label variable ag_need2030 "Public investments in agriculture, avg. annual + spending 2030 (Const. 2015 USD)"

* Recalculate aggregate per capita need by income group for all countries
drop CLincpc_ag_need1530
bysort incomegroup2015: egen incpc_ag_need1530 = wtmean(pc_ag_need1530), weight(pop2025)
 label variable incpc_ag_need1530 "Total per capita public investments in agriculture by income group, avg. annual + spending 2025 (Const. 2015 USD)"
bysort incomegroup2015: egen incpc_ag_need2030 = wtmean(pc_ag_need2030), weight(pop2030)
 label variable incpc_ag_need2030 "Total per capita public investments in agriculture by income group, avg. annual + spending 2030 (Const. 2015 USD)" 
 
* Total need by income group
bysort incomegroup2015: egen incag_need1530 = sum(ag_need1530) 
 label variable incag_need1530 "Total public investments in agriculture by income group, avg. annual + spending (Const. 2010 USD)"

bysort incomegroup2015: egen incag_need2030 = sum(ag_need2030) 
 label variable incag_need2030 "Total public investments in agriculture by income group 2030, avg. annual + spending (Const. 2010 USD)"

save "output\costing\ag_public_rural.dta", replace


********************************************************************************
********************************************************************************
*** 2. Education
********************************************************************************
********************************************************************************

********************************************************************************
** A . The International Commission on Financing Global Education Opportunity. 2016.
**		https://report.educationcommission.org/wp-content/uploads/2016/09/Learning_Generation_Full_Report.pdf - Table 3
** 		Costing and illustrative financing plan for the Learning Generation: Domestic public, avg. 2015-2030
**		Costing and illustrative financing plan for the Learning Generation: International finance of which DAC ODA, avg. 2015-2030
**		% of GDP
********************************************************************************

* The report uses 2014 WB income groups, see Classification (page 166)
use "output\UN_memberstates.dta", clear
drop Region

merge 1:1 countrycode using "output\WBincomegroups_all.dta", keepusing(incomegroup2015 incomegroup2014)
drop if _merge == 2
drop _merge

merge 1:1 countrycode using "output\merged_full_totals_cons.dta", keepusing(region pop2015 pop2025 pop2030 pop_avg1530)
drop _merge

* Fill in values from Table 3, Column avg. 2015-2030, Domestic Public and international finance of which DAC ODA
 * Create income group average population by summing up average populations of countries in each 2014 income group
egen inc_pop_15_30 = sum(pop_avg1530), by(incomegroup2014)
 label var inc_pop_15_30 "Population total, average 2015-2030 of countries according to 2014 inc group"
egen inc_pop_2015=sum(pop2015), by(incomegroup2014)
 label var inc_pop_2015 "Population total, 2015 of countries according to 2014 inc group"
egen inc_pop_2030=sum(pop2030), by(incomegroup2014)
 label var inc_pop_2015 "Population total, 2030 of countries according to 2014 inc group"

* Create estimate of average annual education needs by income group, then create per capita estimate also inflate from 2014 to 2015 USD
gen inc_educ_needs = .
replace inc_educ_needs = 28 + 15 if incomegroup2014 == "L"
replace inc_educ_needs = 430 + 7 if incomegroup2014 == "LM"
replace inc_educ_needs = 1311 + 3 if incomegroup2014 == "UM"

* Convert from billions and inflate from 2014 to 2015 USD
replace inc_educ_needs = inc_educ_needs * 1000000000 * 1.010688197560162
 label var inc_educ_needs "Sum total education cost by 2014 inc group"

* Create per capita estimate
gen pc_educ_needs = inc_educ_needs/inc_pop_15_30
 label variable pc_educ_needs "Pc country-level educ public financing need using 2014 income groups, avg (Const. $ 2015)"
gen pc_educ_needs2015 = inc_educ_needs/inc_pop_2015
 label variable pc_educ_needs2015 "Pc country-level educ public financing need using 2014 income groups 2015, avg (Const. $ 2015)"
gen pc_educ_needs2030 = inc_educ_needs/inc_pop_2030
 label variable pc_educ_needs2030 "Pc country-level educ public financing need using 2014 income groups 2030, avg (Const. $ 2015)"
 
* Assign per capita estimate to four countries, Argentina, Equatorial Guinea, Russia, and Venezuela, who went from High income to Upper middle income in 2015, need to include them. Assign UM pc_educ_needs
sort incomegroup2015 incomegroup2014
replace pc_educ_needs = 526.133728027343 if countrycode=="RUS" | countrycode=="VEN" | countrycode=="ARG" |countrycode=="GNQ"
replace pc_educ_needs2015 = 549.971289948226 if countrycode=="RUS" | countrycode=="VEN" | countrycode=="ARG" |countrycode=="GNQ"
replace pc_educ_needs2030 = 509.606391371983 if countrycode=="RUS" | countrycode=="VEN" | countrycode=="ARG" |countrycode=="GNQ"

* Calculate total financing needs by country
gen educ_needs = pc_educ_needs * pop2025
 label variable educ_needs "Educ total country-level public financing need using 2014 income groups, avg (Const. $ 2015)"

gen educ_needs2015 = pc_educ_needs2015 * pop2015
 label variable educ_needs2015 "Educ total country-level public financing need 2015 using 2014 income groups, avg (Const. $ 2015)"
 
gen educ_needs2030 = pc_educ_needs2030 * pop2030
 label variable educ_needs2030 "Educ total country-level public financing need 2030 using 2014 income groups, avg (Const. $ 2015)"

* Total funding need by income group
bysort incomegroup2015: egen inceduc_needs = sum(educ_needs)
 label variable inceduc_needs "Educ public financing need by 2015 income group, avg (Const. $ 2015)"

bysort incomegroup2015: egen inceduc_needs2015 = sum(educ_needs2015)
 label variable inceduc_needs2015 "Educ public financing need 2015 by 2015 income group, avg (Const. $ 2015)"

bysort incomegroup2015: egen inceduc_needs2030 = sum(educ_needs2030)
 label variable inceduc_needs2030 "Educ public financing need 2030 by 2015 income group, avg (Const. $ 2015)"

* New per capita funding need by income group according to WB 2015 income groups
bysort incomegroup2015: egen incpc_educ_needs = wtmean(pc_educ_needs), weight(pop2025)
 label variable incpc_educ_needs "Pc justice public financing need by 2015 income group, avg (Const. $ 2015)"
 
bysort incomegroup2015: egen incpc_educ_needs2015 = wtmean(pc_educ_needs2015), weight(pop2015)
 label variable incpc_educ_needs2015 "Pc justice public financing need 2015 by 2015 income group, avg (Const. $ 2015)"
 
bysort incomegroup2015: egen incpc_educ_needs2030 = wtmean(pc_educ_needs2030), weight(pop2030)
 label variable incpc_educ_needs2030 "Pc justice public financing need 2030 by 2015 income group, avg (Const. $ 2015)"
 
recast str3 countrycode
recast str30 countryname

save "output\costing\educ_needs_clean.dta", replace

********************************************************************************
********************************************************************************
*** 3. Health
********************************************************************************
********************************************************************************

********************************************************************************
** A . Stenberg et al. 2017 
**		https://www.thelancet.com/journals/langlo/article/PIIS2214-109X(17)30263-2/fulltext - Table 3
**		Estimated additional resource needs: Modeled general government health expenditure total cost per person 2030 (GGHE) - Ambitious Scenario
**		2014 USD
********************************************************************************

import excel "input\costing\Lancet health systems.xlsx", sheet("Ambitious scenario") firstrow case(lower) clear
replace countrygroup = "all" if countrygroup == ""
 label variable gghe_mean2030 "Health total public financing need per person, 2030 (Const. USD 2014)"
 label variable gghe_min2030 "Min health total public financing need per person, 2030 (Const. USD 2014)"
save "output\costing\healthsystems_needs.dta", replace

gen incomegroup2015 = countrygroup if inlist(countrygroup, "L", "LM", "UM")
keep if incomegroup2015 != ""
keep incomegroup2015 gghe_mean2030

save "output\costing\healthsystems_needs_income.dta", replace

* Country group classification (Apdx S1)
import excel "input\costing\lancet health systems APDX.xlsx", sheet("Table 1") cellrange(A2:J69) firstrow case(lower) clear
rename country countryname
rename incomegroup incomegroup201

* GNI per capita PPP
wbopendata, indicator(NY.GNP.PCAP.PP.CD) nometadata year(2015) clear

drop if region == "Aggregates"
replace region = "EAP" if countryname == "Nauru"
replace region = "ECS" if countryname == "Kosovo"
drop if region == ""
keep countryname countrycode yr2015
rename yr2015 gniPPP2015

* Correlation between GNI per capita,PPP and GDP per capita, PPP is close to 1 (0.99), so fill in missing 2015 GDP per capita PPP from CIA World Factbook manually
replace gniPPP2015 = 12200/1.010942042942703 if countrycode == "CUB" // Convert from 2016 to 2015 dollars
replace gniPPP2015 = 3300/1.030157114162287 if countrycode == "DJI" // Convert from 2017 to 2015 dollars
replace gniPPP2015 = 1500/1.030157114162287 if countrycode == "ERI" // Convert from 2017 to 2015 dollars
replace gniPPP2015 = 1700 if countrycode == "PRK"
replace gniPPP2015 = 2900 if countrycode == "SYR"
replace gniPPP2015 = (19140000000/13797000)/1.010942042942703 if countrycode == "SOM" // Only GDP PPP available. Supplement with UN WPP Population info for 2015. Convert from 2016 to 2015 dollars
replace gniPPP2015 = 17300/1.030157114162287 if countrycode == "VEN" // Convert back from 2017 to 2015 dollars

save "output\costing\gnippp2015.dta", replace

* Supplement with WHO (2017) 
 * http://apps.who.int/gho/data/node.main.A1444?lang=en 
 * Downloaded on 2/28/2019
 * Physicians density (per 1000 population) and Nursing and midwifery personnel density (per 1000 population)
********************************************************************************
import excel "input\costing\WHO_GHO_healthcaredensity(28February2019).xlsx", sheet("data") firstrow case(lower) clear

keep if inlist(ghocode, "HRH_33", "HRH_26")
replace ghocode = "phys" if ghocode == "HRH_26"
replace ghocode = "nurse" if ghocode == "HRH_33"
keep ghocode yearcode countrycode countrydisplay numeric

* Reshape wide by type
reshape wide numeric, i(countrycode year) j(ghocode, string)

* Calculate total
gen hrh = numericnurse + numericphys
 label variable hrh "Human resources for health: density of doctors, nurses and midwives per 1000 population"

* Identify most recent year with both values
drop if hrh == .

bysort countrycode: egen maxyear = max(year)
keep if yearcode == maxyear
rename countrydisplay countryname
keep countryname countrycode hrh

save "output\costing\WHO_HRH.dta", replace

* Supplement with WHO (2017) 
 * http://apps.who.int/gho/data/node.main.REPWOMEN39
 * Downloaded on 2/28/2019
 * Births attended by skilled health personnel (%)
********************************************************************************
import excel "input\costing\WHO_GHO_skilledbirth(28February2019).xlsx", sheet("data") firstrow case(lower) clear

keep if ghocode == "MDG_0000000025"

* Only one year available for each country
keep countrycode countrydisplay numeric
rename numeric sba
 label variable sba "Births attended by skilled health personnel (%)"
rename countrydisplay countryname

save "output\costing\WHO_SBA.dta", replace

* Population affected by conflict
 * Paper is unclear on source, use paper's data (limited country sample)
import excel "input\costing\lancet health systems APDX.xlsx", sheet("Table 1") cellrange(A2:J69) firstrow case(lower) clear
rename country countryname
rename populationaffectedbyconflict conflict

replace conflict = "." if conflict == "-"
destring conflict, replace
keep countryname conflict

* Clean country names to add countrycodes
CLEAN_COUNTRY_NAMES countryname

* Add countrycodes
merge m:1 countryname using "output\UN_memberstates.dta", keepusing(countrycode)
drop _merge

save "output\costing\lancet_conflict.dta", replace

* Supplement with Fund for Peace - Fragile States Index 2015
 * http://fundforpeace.org/fsi/excel/
 * Downloaded on 2/28/2019
 * Fragility index = C1: Security Apparatus + E1: Economy + Ps: Public Services + S1: Demographic Pressures + X1: External Intervention
********************************************************************************
import excel "input\costing\fundforpeace_fragile_states_index(28February2019).xlsx", sheet("2015") firstrow case(lower) clear

drop if country ==""
rename country countryname

* Fragility index score used by WHO includes only a subset of 5 subcomponents
gen findex = c1securityapparatus + e1economy + p2publicservices + s1demographicpressures + x1externalintervention
 label variable findex "Fund for Peace fragility index 2015 (C1 + E1 + P2 + S1 + X1)"
keep countryname findex

* Clean country names to add countrycodes
CLEAN_COUNTRY_NAMES countryname

* Add countrycodes
merge m:1 countryname using "output\UN_memberstates.dta", keepusing(countrycode)
drop if _merge==1
drop _merge

save "output\costing\fundforpeace_index.dta", replace

* Merge datasets and use logic from Stenberg et al. Apdx 2 to identify country groupings

use "output\costing\fundforpeace_index.dta", clear
merge 1:1 countrycode using "output\costing\lancet_conflict.dta"
drop _merge
merge 1:1 countrycode using "output\costing\WHO_SBA.dta"
drop _merge
merge 1:1 countrycode using "output\costing\WHO_HRH.dta"
drop _merge
merge 1:1 countrycode using "output\costing\gnippp2015.dta"
drop _merge

* Drop non-UN member states
merge m:1 countryname using "output\UN_memberstates.dta", keepusing(countrycode)
drop if _merge == 1
drop _merge

* Exclude high-income countries
merge 1:1 countrycode using "output\merged_full_totals_cons.dta", keepusing(incomegroup2015)
drop _merge
drop if incomegroup2015 == "H"

* Identify conflict states: > 10% population affected by conflict
gen countrygroup = "C" if conflict > 10 & conflict != .

* Identify vulnerable states: > 43.5 on fragility index
replace countrygroup = "V" if findex >= 43.5 & findex != . & countrygroup == ""

* Identify HS3 states: GNI p.c. > 10,0000 or GNI p.c. > 5,000 & SBA > 90%
replace countrygroup = "HS3" if gniPPP2015 > 10000 & gniPPP2015 != . & countrygroup == ""
replace countrygroup = "HS3" if gniPPP2015 > 5000 & gniPPP2015 != . & sba > 90 & sba != . & countrygroup == ""

* Identify HS2 states: GNI p.c. > 2,500 or GNI p.c. < 2,500 & SBA > 90%
replace countrygroup = "HS2" if gniPPP2015 >= 2500 & gniPPP2015 != . & countrygroup == ""
replace countrygroup = "HS2" if gniPPP2015 < 2500 & sba > 90 & sba != . & countrygroup == ""

* Identify HS1 states: GNI p.c. < 2,500 & (HRH < 2.28 or SBA < 90%)
replace countrygroup = "HS1" if gniPPP2015 < 2500 & (hrh < 2.28 | sba < 90) & countrygroup == ""

keep countryname countrycode countrygroup incomegroup2015
save "output\costing\lancet_countrygroupclass.dta", replace

* Combine and calculate country level total need and aggregates

* Add aggregate totals by country group
use "output\costing\lancet_countrygroupclass.dta", clear
merge m:1 countrygroup using "output\costing\healthsystems_needs.dta"
drop if _merge == 2
drop _merge

keep countryname countrycode countrygroup gghe_mean2030 incomegroup2015

* Convert to 2015 constant USD
gen merge = 1
merge m:1 merge using "output\GDPratio_2014_to_2015.dta", keepusing(ratio_14_15)
drop _merge merge

gen gghe_mean2030_gdp2015 = gghe_mean2030 * ratio_14_15

* Calculate total needs by country
*** Note: Our estimates of total and per capita health needs in 2025 (labeled health_need2030 and pc_health_need2030) are based on Stenberg et al. (2017) estimates for 2026-2030
 
merge 1:1 countrycode using "output\merged_full_totals_cons.dta", keepusing(pop2030 pop_avg2630 pop2015)
drop if _merge == 2
drop _merge

gen gghe_mean2015=gghe_mean2030*pop2030/pop2015
gen gghe_mean2015_gdp2015 = gghe_mean2015 * ratio_14_15

gen health_need2030 = gghe_mean2030_gdp2015 * pop2030
 label variable health_need2030 "Mean Health public financing need using income groups, 2030 (Const. USD 2015)"
gen health_need2015 = gghe_mean2015_gdp2015 * pop2015
 label variable health_need2015 "Mean Health public financing need using income groups, 2015 (Const. USD 2015)"

* Identify per-capita need by country
gen pc_health_need2030 = gghe_mean2030_gdp2015
 label variable pc_health_need2030 "Per capita mean health public financing need using income groups, 2030 (Const. USD 2015)"
gen pc_health_need2015 = gghe_mean2015_gdp2015
 label variable pc_health_need2015 "Per capita mean health public financing need using income groups, 2015 (Const. USD 2015)"

* Total need by income group
bysort incomegroup2015: egen inchealth_need2030 = sum(health_need2030)
 label variable inchealth_need2030 "Mean Health public financing need by income group, 2030 (Const. USD 2015)"

bysort incomegroup2015: egen inchealth_need2015 = sum(health_need2015)
 label variable inchealth_need2015 "Mean Health public financing need by income group, 2015 (Const. USD 2015)" 
 
* Per capita need by income group
bysort incomegroup2015: egen incpc_health_need2030 = wtmean(pc_health_need2030), weight(pop2030)
 label variable incpc_health_need2030 "Per capita mean health public financing need by income group, 2030 (Const. USD 2015)"
 
bysort incomegroup2015: egen incpc_health_need2015 = wtmean(pc_health_need2015), weight(pop2015)
 label variable incpc_health_need2015 "Per capita mean health public financing need by income group, 2015 (Const. USD 2015)"
 
save "output\costing\health_needs_clean.dta", replace 

********************************************************************************
********************************************************************************
*** 4. Social Spending
********************************************************************************
********************************************************************************

********************************************************************************
** A . World Data Lab - World Poverty Clock
**		https://worldpoverty.io/
**  	Downloaded on 3/14/2019
**		Poverty gap per capita, under $1.90 dollars a day
**  	2011 PPP
********************************************************************************

use "input\costing\povertygap.dta", clear
keep if year >=2015 & year != .
rename iso3c countrycode

* Assume if country has 0 poverty headcount rate, the gap is = 0
replace gap_tot = 0 if hcr_poverty == 0 
keep countryname countrycode year gap_tot 

* Convert from PPP 2011 to 2015 USD constant
merge m:1 countrycode using  "output\GDPratio_PPP2011_to_2015.dta", keepusing(countryname ratio_ppp2011_k2015)
drop if _merge==2
drop _merge

gen pgtSS_need = gap_tot * ratio_ppp2011_k2015

* Convert from PPP 2011 to 2015 USD constant using PPP conversion factor, exchange rates and GDP deflator
preserve

* Import indicators
wbopendata, indicator(PA.NUS.PPP; NY.GDP.MKTP.CN; NY.GDP.MKTP.CD) long nometadata clear
drop if inlist(region, "Aggregates", "") & countrycode!="GIB" & countrycode!="XKX" & countrycode!="NRU" & countrycode!="VGB"
label var pa_nus_ppp "PPP conversion factor, GDP (LCU per international $)"

* Create market exchange rates
gen mkt_ex = ny_gdp_mktp_cd/ny_gdp_mktp_cn
label var mkt_ex "Market Exchange rates dollars per LCU"

* Check latest year at or near 2011
egen mx_year = max(cond(mkt_ex!=., year, .)) if year<=2011, by(countrycode)
tab mx_year if inlist(countryname, "Cuba", "Djibouti", "Eritrea", "Somalia", "Venezuela, RB"), missing

* Somalia first exchange rate in 2013, so make sure to keep that year for Somalia
keep if year == 2011 | (year == 2013 & countrycode == "SOM")
collapse (mean) pa_nus_ppp mkt_ex, by(countryname countrycode)
tempfile ex_rate
save `ex_rate'.dta, replace
restore

merge m:1 countrycode using `ex_rate'.dta
keep if _merge==3 // Taiwan, British Virgin Islands, Nauru, Gibraltar not merging, fine since they're not UN states.
drop _merge

* Compute alternate constant 2015 USD estimate of Poverty gap total value from constant 2011 PPP $.
 * Multiply by ratio of LCU to international $, then multiply by exrate dollar per LCU, then inflate from 2011 to 2015 using GDP deflator for US.
replace pgtSS_need = gap_tot * pa_nus_ppp * mkt_ex * 1.067979899891271 if pgtSS_need == . & gap_tot !=.

* Calculate per capita in 2015 and 2025
keep countryname countrycode year pgtSS_need
reshape wide pgtSS_need, i(countrycode) j(year)
 label variable pgtSS_need2015 "Poverty gap transfer required, 2015 (USD const. 2015)"
 label variable pgtSS_need2025 "Poverty gap transfer required, 2025 (USD const. 2030)"
 label variable pgtSS_need2030 "Poverty gap transfer required, 2030 (USD const. 2030)"

* Keep UN member states
merge m:1 countrycode using "output\UN_memberstates.dta", keepusing(countryname)
drop if _merge==1
drop _merge 

********************************************************************************
** B . ILO - World Social Protection Report Data 2017-2019
** 		http://www.ilo.org/gimi/gess/AggregateIndicator.action#expenditure
** 		Downloaded on 1/29/2019
**		Public social protection expenditure, excluding health, latest available year 
**		% of GDP
********************************************************************************

merge 1:1 countrycode using "output\merged_full_totals_cons.dta", keepusing(countryname pop2015 pop2025 pop2030 incomegroup2015 region pop_avg1530 pop_avg1525 pc_SSgdp_1530 pc_SSgdp_1525 pc_SSgdp_2015 pc_SSgdp_2025 pc_SSgdp_2030 a_pc_SSgdp_2015 a_pc_SSgdp_2025 a_pc_SSgdp_2030)
drop if _merge==2
drop _merge

gen pc_pgtSS_need2015 = pgtSS_need2015 /  pop2015 + a_pc_SSgdp_2015
 label variable pc_pgtSS_need2015 "Per capita poverty gap transfer required + current social spending, 2015 (USD const. 2015)"
gen pc_pgtSS_need2025 = pgtSS_need2025 /  pop2025 + a_pc_SSgdp_2025
 label variable pc_pgtSS_need2025 "Per capita poverty gap transfer required + current social spending, 2025 (USD const. 2015)"
gen pc_pgtSS_need2030 = pgtSS_need2030 /  pop2030 + a_pc_SSgdp_2030
 label variable pc_pgtSS_need2030 "Per capita poverty gap transfer required + current social spending, 2030 (USD const. 2015)"
 
* Total funding by income group
bysort incomegroup2015: egen incpgtSS_need2015 = sum(pgtSS_need2015)
 label variable incpgtSS_need2015 "Total need for poverty gap transfer by income group + current social spending, 2015 (USD const. 2015)"
bysort incomegroup2015: egen incpgtSS_need2025 = sum(pgtSS_need2025)
 label variable incpgtSS_need2025 "Total need for poverty gap transfer by income group + current social spending, 2025 (USD const. 2015)"
bysort incomegroup2015: egen incpgtSS_need2030 = sum(pgtSS_need2030)
 label variable incpgtSS_need2030 "Total need for poverty gap transfer by income group + current social spending, 2030 (USD const. 2015)"
 
* Per capita funding by income group
bysort incomegroup2015: egen incpc_pgtSS_need2015 = wtmean(pc_pgtSS_need2015), weight(pop2015)
 label variable incpc_pgtSS_need2015 "Per capita poverty gap transfer + current social spending need by income group, 2015 (Const. USD 2015)"
bysort incomegroup2015: egen incpc_pgtSS_need2025 = wtmean(pc_pgtSS_need2025), weight(pop2025)
 label variable incpc_pgtSS_need2025 "Per capita poverty gap transfer + current social spending need by income group, 2025 (Const. USD 2015)"
bysort incomegroup2015: egen incpc_pgtSS_need2030 = wtmean(pc_pgtSS_need2030), weight(pop2030)
 label variable incpc_pgtSS_need2030 "Per capita poverty gap transfer + current social spending need by income group, 2030 (Const. USD 2015)"
drop if incomegroup2015 == "H"
 
save "output\costing\pgtSS_needs_clean.dta", replace 


********************************************************************************
********************************************************************************
*** 5. Infrastructure
********************************************************************************
********************************************************************************

********************************************************************************
** A . Rozenberg and Fay. 2019
**		https://www.worldbank.org/en/data/interactive/2019/02/19/data-table-infrastructure-investment-needs-in-low-and-middle-income-countries
**		Sectors: Energy, transportation, flood protection 
**		Infrastructure spending needs in low and middle income countries, capital and maintenance, 2015-2030 (Scenario: Preferred)
**		% of GDP
**		Public spending, 2011
**		% of total infrastructure investment
********************************************************************************

import excel "input\costing\Infra_spendingneeds(22February2019).xlsx", sheet("data") firstrow case(lower) clear
keep if scenario == "Preferred"

* Calculate total % of GDP by sector (combine capital and maintenance)

bysort region sector: egen tol_shgdp = sum(percentofgdp)
replace tol_shgdp = tol_shgdp / 100
 label variable tol_shgdp "Share of GDP for infrastructure sector (capital + maintenance)"

keep region sector tol_shgdp
duplicates drop


* Identify public sector share (Table 4)
 * Apply region central estimate equally to each sector 
 * (this is public spending as % of total infrastructure investment 2011)
gen public_sh = .
 replace public_sh = .98 if region == "East Asia and Pacific"
 replace public_sh = .83 if region == "Eastern Europe and Central Asia"
 replace public_sh = .75 if region == "Latin America and Caribbean"
 replace public_sh = .94 if region == "Middle-East and North Africa"
 replace public_sh = .62 if region == "South Asia"
 replace public_sh = .75 if region == "Sub-Saharan Africa" 
 replace public_sh = .89 if region == "LMICs" 
 
gen pub_shgdp =  tol_shgdp * public_sh
 label variable pub_shgdp "Infrastructure public financing need by region/sector, annual avg. 2015-30 (% of GDP)"

* Remove irrigation to limit overlap
drop if sector == "Water supply and sanitation"
drop if sector == "Irrigation"  
  
* Reshape by sector
replace sector = "Flood" if sector == "Flood protection"
* Replace sector = "WASH" if sector == "Water supply and sanitation"
keep region sector pub_shgdp
reshape wide pub_shgdp, i(region) j(sector, string) 

* Calculate country total using share of GDP

replace region = "EAS" if region == "East Asia and Pacific"
replace region = "ECS" if region == "Eastern Europe and Central Asia"
replace region = "LCN" if region == "Latin America and Caribbean"
replace region = "MEA" if region == "Middle-East and North Africa"
replace region = "SAS" if region == "South Asia"
replace region = "SSF" if region == "Sub-Saharan Africa" 
 
merge 1:m region using "output\merged_full_totals_cons.dta", keepusing(countryname countrycode gdp_traj_avg1530 incomegroup2015 gdp_traj*)
drop if _merge == 1
drop _merge

merge 1:1 countrycode using "output\gdp_cons2030wide.dta", keepusing(gdp_traj2025 gdp_traj2015)
drop if _merge == 2
drop _merge

* Keep only low and middle income countries as of 2015
drop if incomegroup2015 == "H" 

* Total needs by country
gen energy_needs2025 = pub_shgdpEnergy * gdp_traj2025
 label variable energy_needs2025 "Energy public financing need, 2025 (Const. USD 2015)"

gen energy_needs2015 = pub_shgdpEnergy * gdp_traj2015
 label variable energy_needs2015 "Energy public financing need, 2015 (Const. USD 2015)"

gen energy_needs2030 = pub_shgdpEnergy * gdp_traj2030
 label variable energy_needs2030 "Energy public financing need, 2030 (Const. USD 2015)"
 
replace energy_needs2025 = pub_shgdpEnergy*gdp_traj2015 if gdp_traj2025<gdp_traj2015
replace energy_needs2030 = pub_shgdpEnergy*gdp_traj2025 if gdp_traj2030<gdp_traj2025
replace energy_needs2030 = pub_shgdpEnergy*gdp_traj2015 if gdp_traj2030<gdp_traj2015

gen flood_needs2025 = pub_shgdpFlood * gdp_traj2025
 label variable flood_needs2025 "Flood protection public financing need, 2025 (Const. USD 2015)"
 
gen flood_needs2015 = pub_shgdpFlood * gdp_traj2015
 label variable flood_needs2015 "Flood protection public financing need, 2015 (Const. USD 2015)"
 
gen flood_needs2030 = pub_shgdpFlood * gdp_traj2030
 label variable flood_needs2030 "Flood protection public financing need, 2030 (Const. USD 2015)"
 
replace flood_needs2025 = pub_shgdpFlood*gdp_traj2015 if gdp_traj2025<gdp_traj2015
replace flood_needs2030 = pub_shgdpFlood*gdp_traj2025 if gdp_traj2030<gdp_traj2025
replace flood_needs2025 = pub_shgdpFlood*gdp_traj2015 if gdp_traj2030<gdp_traj2015

gen transport_needs2025 = pub_shgdpTransport * gdp_traj2025
 label variable transport_needs2025 "Transportion public financing need, 2025 (Const. USD 2015)"
 
gen transport_needs2015 = pub_shgdpTransport * gdp_traj2015
 label variable transport_needs2015 "Transportion public financing need, 2015 (Const. USD 2015)"

 gen transport_needs2030 = pub_shgdpTransport * gdp_traj2030
 label variable transport_needs2030 "Transportion public financing need, 2030 (Const. USD 2015)"
 
replace transport_needs2025 = pub_shgdpTransport*gdp_traj2015 if gdp_traj2025<gdp_traj2015
replace transport_needs2030 = pub_shgdpTransport*gdp_traj2025 if gdp_traj2030<gdp_traj2025
replace transport_needs2030 = pub_shgdpTransport*gdp_traj2015 if gdp_traj2030<gdp_traj2015

* Income group aggregates

foreach var of varlist energy_needs2025 flood_needs2025 transport_needs2025 {
	bysort incomegroup2015: egen inc`var' = sum(`var')
	}
	
foreach var of varlist energy_needs2015 flood_needs2015 transport_needs2015 {
	bysort incomegroup2015: egen inc`var' = sum(`var')
	}

foreach var of varlist energy_needs2030 flood_needs2030 transport_needs2030 {
	bysort incomegroup2015: egen inc`var' = sum(`var')
	}

* Calculate country per capita

merge 1:1 countrycode using "output\pop_all_un62017_2030.dta", keepusing(pop2025 pop2015 pop2030)
drop if _merge == 2
drop _merge

foreach var of varlist energy_needs2025 flood_needs2025 transport_needs2025 {
	gen pc_`var' = `var' / pop2025
	label variable pc_`var' "Per capita public financing need, 2025 (Const. USD 2015)"
	}
	
foreach var of varlist energy_needs2015 flood_needs2015 transport_needs2015 {
	gen pc_`var' = `var' / pop2015
	label variable pc_`var' "Per capita public financing need, 2015 (Const. USD 2015)"
	}

foreach var of varlist energy_needs2030 flood_needs2030 transport_needs2030 {
	gen pc_`var' = `var' / pop2030
	label variable pc_`var' "Per capita public financing need, 2030 (Const. USD 2015)"
	}

* Calculate income group per capita

foreach var of varlist pc_energy_needs2025 pc_flood_needs2025 pc_transport_needs2025 {
	bysort incomegroup2015: egen inc`var' = wtmean(`var'), weight(pop2025)
	}	
	
foreach var of varlist pc_energy_needs2015 pc_flood_needs2015 pc_transport_needs2015 {
	bysort incomegroup2015: egen inc`var' = wtmean(`var'), weight(pop2015)
	}	

foreach var of varlist pc_energy_needs2030 pc_flood_needs2030 pc_transport_needs2030 {
	bysort incomegroup2015: egen inc`var' = wtmean(`var'), weight(pop2030)
	}
	
save "output\costing\infrastructure_needs_clean.dta", replace 
 

********************************************************************************
********************************************************************************
*** 6. WASH
********************************************************************************
********************************************************************************

********************************************************************************
** A . Government Spending Watch - Spending Data
**		http://www.governmentspendingwatch.org/spending-data
**		Downloaded on 1/31/2019
**		Planned government spending on water and sanitation
**		% of GDP
********************************************************************************

local files: dir "input\GovSpendingWatch\" files "*.xlsx", respectcase
foreach file in `files'{
	import excel using "input/GovSpendingWatch/`file'", sheet("WASH") cellrange(A2:K57) firstrow clear 
	save "input/`file'.dta", replace 

	keep if DataQuestion == "% GDP"
	gen countryname = "`file'"
	keep if A == "Water & Sanitation expenditure"
	foreach var of varlist Planned-K{
		capture replace `var' = "." if `var' == "No Data"
		capture destring `var', replace
		}
	save "output/WASH`file'.dta", replace 
	} 

use "output/WASHAfghanistan.xlsx.dta", clear
local files: dir "input\GovSpendingWatch\" files "*.xlsx"
 foreach file in `files'{
	append using "output/WASH`file'.dta", force
	} 

save "output\WASHcombined.dta", replace	

use "output\WASHcombined.dta", clear
duplicates drop // Afghanistan

* Keep planned expenditure column (best coverage)
keep A DataQuestion Planned F I	countryname
rename Planned plan2015
 label variable plan2015 "2014/15 (2015) % of GDP"
rename F plan2016
 label variable plan2016 "2015/16 (2016) % of GDP"
rename I plan2017
 label variable plan2017 "2016/17 (2017) % of GDP"

* 3-year Average
egen wash_3yma_2015 = rowmean(plan2015 - plan2017)
replace wash_3yma_2015 = wash_3yma_2015 / 100
 label variable wash_3yma_2015 "3-year average government spending on WASH % of GDP"

* Clean country names to add countrycodes
replace countryname = substr(countryname, 1, strlen(countryname) - 5)

* Clean country names to add countrycodes
CLEAN_COUNTRY_NAMES countryname

* Add countrycodes
merge 1:1 countryname using "output\UN_memberstates.dta", keepusing(countrycode)
keep if _merge==3
drop _merge 

keep countryname countrycode wash_3yma_2015

* Calculate income group gdp-weighted average current spending as share of GDP
merge 1:1 countrycode using "output\merged_full_totals_cons.dta", keepusing(countryname pop2030 pop2015 pop2025 pop2030 incomegroup2015)
drop _merge

merge 1:1 countrycode using "output\gdp_cons2030wide.dta", keepusing(gdp_traj*)
drop _merge

drop if incomegroup2015 == "H"

bysort incomegroup2015: egen inc_wash_3yma_2015 = wtmean(wash_3yma_2015), weight(gdp_traj2015)
 label variable inc_wash_3yma_2015 "Current spending on WASH GDP-weighted average by income group, 2015 (% GDP)"
 
* Use income group weighted averages to replace missing country observations
gen wash_gdp =  wash_3yma_2015
replace wash_gdp = inc_wash_3yma_2015 if wash_3yma_2015 == .
 label variable wash_gdp "Current spending on WASH, 2015 (% GDP)"
 
* Calculate total current spending on WASH by country and average 2015-2030
for num 2015/2030: gen wash_gdpX = wash_gdp * gdp_trajX
 label variable wash_gdp2015 "Current spending on WASH GDP-weighted average by income group, 2015 (Const. 2015 USD)"
 label variable wash_gdp2025 "Current spending on WASH GDP-weighted average by income group, 2025 (Const. 2015 USD)"
 label variable wash_gdp2030 "Current spending on WASH GDP-weighted average by income group, 2030 (Const. 2015 USD)"

egen wash_gdp1530 = rowmean(wash_gdp2015 - wash_gdp2030)
 label variable wash_gdp1530 "Current spending on WASH GDP-weighted average by income group, avg. 2015-2030 (Const. 2015 USD)"

keep countryname countrycode wash_gdp2015 wash_gdp2025 wash_gdp2030 wash_gdp1530

save "output\costing\WASH_currentspending_est.dta", replace 
 
********************************************************************************
** B . Hutton and Varughese. 2016
**	http://www.worldbank.org/en/topic/water/publication/the-costs-of-meeting-the-2030-sustainable-development-goal-targets-on-drinking-water-sanitation-and-hygiene 
**		Annual costs of basic water, sanitation and hygiene (capital, capital maintenance and operations) Cost Breakdown by Capital, Capital Maintenance and Operations
**		2015 USD
**		Note: Take only 2025 point estimates, not annual avg. Columns BOO, CHY, CWM in Calculations sheet.
********************************************************************************

import excel "input\costing\World Bank WASH_trim2025.xlsx", sheet("Basic WASH") firstrow clear

egen wash_incr_need2025 = rowtotal(BasicWater2025 BasicSanitation2025 BasicHygenie2025)
 label variable wash_incr_need2025 "Incremental cost of extending basic WASH to unserved population, 2025 (Const. 2015 USD)"
replace wash_incr_need2025 = wash_incr_need2025 * 1000
keep countryname wash_incr_need2025

preserve
import excel "input\costing\World Bank WASH_trim2029.xlsx", sheet("BASIC WASH-2030") firstrow clear

egen wash_incr_need2030 = rowtotal(BasicWater2029 BasicSanitation2029 BasicHygenie2029)
 label variable wash_incr_need2030 "Incremental cost of extending basic WASH to unserved population, 2030 (Const. 2015 USD)"
replace wash_incr_need2030 = wash_incr_need2030 * 1000
keep countryname wash_incr_need2030
save "output\costing\WASH_2029.dta", replace 
restore

merge 1:1 countryname using "output\costing\WASH_2029.dta"

* Clean country names to add countrycodes
CLEAN_COUNTRY_NAMES countryname

* Add countrycodes
drop _merge
merge 1:1 countryname using "output\UN_memberstates.dta", keepusing(countryname countrycode)
keep if _merge==3
drop _merge

* Add country-level spending on WASH (48 countries have country-level data: L=20, LM=19, UM=9)
 * Income group GDP-weighted average (2015 GDP) used to fill missing observations

merge 1:1 countrycode using "output\costing\WASH_currentspending_est.dta"
keep if _merge==3
drop _merge

gen wash_need2025 = wash_incr_need2025 + wash_gdp2025
 label variable wash_need2025 "Cost of basic WASH need, 2025 (Const. 2015 USD)"
 
 gen wash_need2030 = wash_incr_need2030 + wash_gdp2030
 label variable wash_need2030 "Cost of basic WASH need, 2030 (Const. 2015 USD)"

* Calculate per capita by country

merge 1:1 countrycode using "output\merged_full_totals_cons.dta", keepusing(pop2025 pop2015 pop2030)
drop if _merge == 2
drop _merge

gen wash_incr_need2015=(wash_incr_need2025/pop2025)*pop2015
gen wash_need2015=wash_incr_need2015+wash_gdp2015

gen pc_wash_need2025 = wash_need2025 / pop2025
 label variable pc_wash_need2025 "Per capita cost of basic WASH need, 2025 (Const. 2015 USD)"

 gen pc_wash_need2015 = wash_need2015 / pop2015
 label variable pc_wash_need2015 "Per capita cost of basic WASH need, 2015 (Const. 2015 USD)"

  gen pc_wash_need2030 = wash_need2030 / pop2030
 label variable pc_wash_need2030 "Per capita cost of basic WASH need, 2030 (Const. 2015 USD)"
 
* Calculate income group per capita

merge 1:1 countrycode using "output\merged_full_totals_cons.dta", keepusing(countryname incomegroup2015)
drop if _merge == 2
drop _merge
drop if incomegroup2015 == "H"

bysort incomegroup2015: egen incpc_wash_need2025 = wtmean(pc_wash_need2025), weight(pop2025)
 label variable incpc_wash_need2025 "Per capita cost of basic WASH need by income group, 2025 (Const. 2015 USD)"

 bysort incomegroup2015: egen incpc_wash_need2015 = wtmean(pc_wash_need2015), weight(pop2015)
 label variable incpc_wash_need2015 "Per capita cost of basic WASH need by income group, 2015 (Const. 2015 USD)"

 bysort incomegroup2015: egen incpc_wash_need2030 = wtmean(pc_wash_need2030), weight(pop2030)
 label variable incpc_wash_need2030 "Per capita cost of basic WASH need by income group, 2030 (Const. 2015 USD)"

* Calculate income group total

bysort incomegroup2015: egen incwash_need2025 = sum(wash_need2025) 
 label variable incwash_need2025 "Total cost of basic WASH need by income group, 2025 (Const. 2015 USD)"

 bysort incomegroup2015: egen incwash_need2015 = sum(wash_need2015) 
 label variable incwash_need2015 "Total cost of basic WASH need by income group, 2015 (Const. 2015 USD)"

 bysort incomegroup2015: egen incwash_need2030 = sum(wash_need2030) 
 label variable incwash_need2030 "Total cost of basic WASH need by income group, 2030 (Const. 2015 USD)"
 
save "output\costing\WASH_needs_clean.dta", replace 
 
********************************************************************************
********************************************************************************
*** 7. Conservation
********************************************************************************
********************************************************************************

********************************************************************************
** A . McCarthy et al. 2012
**		https://science.sciencemag.org/content/338/6109/946 - Table 1
**		Total investment required by income group
**		2012 USD
**		Note: These numbers have been multiplied by 1.17 to inflate their value to the $76.1 billion global number that is mentioned on page 948
********************************************************************************
 
use "output\UN_memberstates.dta", clear
drop Region

merge 1:1 countrycode using "output\merged_full_totals_cons.dta", keepusing(incomegroup2015 region pop2015 pop2025 pop2030)
drop _merge

merge 1:1 countrycode using "output\WBincomegroups_all.dta", keepusing(incomegroup2011)
drop if _merge == 2
drop _merge

egen inc_pop = sum(pop2025), by(incomegroup2011)
egen inc_pop_2015=sum(pop2015), by(incomegroup2011)
egen inc_pop_2030=sum(pop2030), by(incomegroup2011)

* Create per capita estimate of annual biodiversity needs by income group, also inflate from 2012 to 2015 USD
gen pc_biodiversity_needs = .
replace pc_biodiversity_needs = 33.78325652841782 * 1.047882207079493*1000000000/inc_pop if incomegroup2011 == "H"
replace pc_biodiversity_needs = 19.75560675883257 * 1.047882207079493*1000000000/inc_pop if incomegroup2011 == "UM"
replace pc_biodiversity_needs = 16.36559139784946 * 1.047882207079493*1000000000/inc_pop if incomegroup2011 == "LM"
replace pc_biodiversity_needs = 6.066958525345622 * 1.047882207079493*1000000000/inc_pop if incomegroup2011 == "L"
 label var pc_biodiversity_needs "Per capita annual needs for biodiversity 2025 pop, const. 2015 USD"

gen pc_biodiversity_needs2015 = .
replace pc_biodiversity_needs2015 = 33.78325652841782 * 1.047882207079493*1000000000/inc_pop_2015 if incomegroup2011 == "H"
replace pc_biodiversity_needs2015 = 19.75560675883257 * 1.047882207079493*1000000000/inc_pop_2015 if incomegroup2011 == "UM"
replace pc_biodiversity_needs2015 = 16.36559139784946 * 1.047882207079493*1000000000/inc_pop_2015 if incomegroup2011 == "LM"
replace pc_biodiversity_needs2015 = 6.066958525345622 * 1.047882207079493*1000000000/inc_pop_2015 if incomegroup2011 == "L"
 label var pc_biodiversity_needs2015 "Per capita annual needs for biodiversity 2015 pop, const. 2015 USD"

gen pc_biodiversity_needs2030 = .
replace pc_biodiversity_needs2030 = 33.78325652841782 * 1.047882207079493*1000000000/inc_pop_2030 if incomegroup2011 == "H"
replace pc_biodiversity_needs2030 = 19.75560675883257 * 1.047882207079493*1000000000/inc_pop_2030 if incomegroup2011 == "UM"
replace pc_biodiversity_needs2030 = 16.36559139784946 * 1.047882207079493*1000000000/inc_pop_2030 if incomegroup2011 == "LM"
replace pc_biodiversity_needs2030 = 6.066958525345622 * 1.047882207079493*1000000000/inc_pop_2030 if incomegroup2011 == "L"
 label var pc_biodiversity_needs2030 "Per capita annual needs for biodiversity 2030 pop, const. 2015 USD"

* Create country total needs
gen biodiversity_needs = pc_biodiversity_needs * pop2025
 label var biodiversity_needs "Annual needs for biodiversity 2025, const. 2015 USD"

gen biodiversity_needs2015 = pc_biodiversity_needs * pop2015
 label var biodiversity_needs2015 "Annual needs for biodiversity 2015, const. 2015 USD"

gen biodiversity_needs2030 = pc_biodiversity_needs * pop2030
 label var biodiversity_needs2030 "Annual needs for biodiversity 2030, const. 2015 USD"

* Create income group totals
egen inc_biodiversity_needs = sum(biodiversity_needs), by(incomegroup2015)
 label var biodiversity_needs "Annual needs for biodiversity by 2015 income group, const. 2015 USD"

egen inc_biodiversity_needs2015 = sum(biodiversity_needs2015), by(incomegroup2015)
 label var biodiversity_needs2015 "Annual needs for biodiversity 2015 by 2015 income group, const. 2015 USD"

egen inc_biodiversity_needs2030 = sum(biodiversity_needs2030), by(incomegroup2015)
 label var biodiversity_needs2030 "Annual needs for biodiversity 2030 by 2015 income group, const. 2015 USD"

* Create new income group per capita average
bysort incomegroup2015: egen incpc_biodiversity_need = wtmean(pc_biodiversity_needs), weight(pop2025)
 label var biodiversity_needs "Per capita annual needs for biodiversity by 2015 income group, const. 2015 USD"

bysort incomegroup2015: egen incpc_biodiversity_need2015 = wtmean(pc_biodiversity_needs2015), weight(pop2015)
 label var incpc_biodiversity_need2015 "Per capita annual needs for biodiversity 2015 by 2015 income group, const. 2015 USD"

bysort incomegroup2015: egen incpc_biodiversity_need2030 = wtmean(pc_biodiversity_needs2030), weight(pop2030)
 label var incpc_biodiversity_need2015 "Per capita annual needs for biodiversity 2015 by 2015 income group, const. 2015 USD"

recast str3 countrycode
drop if incomegroup2015 == "H"

save "output\costing\biodiversity_needs_clean.dta", replace


********************************************************************************
********************************************************************************
*** 8. Justice
********************************************************************************
********************************************************************************

********************************************************************************
** A . Manuel et al. 2019
**		https://www.odi.org/sites/odi.org.uk/files/resource-documents/12702.pdf - Chapter 6, Table 1
**		Universal basic justice costing framework - main components ($ per person per year)
**		2013 USD
********************************************************************************

* Using only public components of Table 1, i.e. not using last item "Out-of-pocket expenses"
use "output\UN_memberstates.dta", clear
drop Region

merge 1:1 countrycode using "output\WBincomegroups_all.dta", keepusing(incomegroup2015 incomegroup2017)
drop if _merge == 2
drop _merge

merge 1:1 countrycode using "output\merged_full_totals_cons.dta", keepusing(region pop2015 pop2025 pop2030 pop_avg1530)
drop _merge

* Create OECD dummy to assign per capita needs
gen oecd = (inlist(countryname, "Australia", "Austria", "Belgium", "Canada", "Chile", "Czech Republic", "Denmark", "Estonia", "Finland")) 
replace oecd = 1 if (inlist(countryname, "France", "Germany", "Greece", "Hungary", "Iceland", "Ireland", "Israel", "Italy", "Japan"))
replace oecd = 1 if (inlist(countryname, "Korea, Rep.", "Latvia", "Lithuania", "Luxembourg", "Mexico", "Netherlands", "New Zealand"))
replace oecd = 1 if (inlist(countryname, "Norway", "Poland", "Portugal", "Slovak Republic", "Slovenia", "Spain", "Sweden", "Switzerland"))
replace oecd = 1 if (inlist(countryname, "Turkey", "United Kingdom", "United States"))

* Fill in values from Table 1, without Out of pocket expenses
gen pc_justice_needs = 15 if incomegroup2017 == "L"
replace pc_justice_needs = 54 if inlist(incomegroup2017, "LM", "UM")
replace pc_justice_needs = 170 if incomegroup2017 == "H"
replace pc_justice_needs = 210 if oecd == 1

* Inflate to 2015 prices
replace pc_justice_needs = pc_justice_needs * 1.010688197560162
 label variable pc_justice_needs "Per capita justice public financing need using income groups, average (Const. USD 2015)"

* Calculate total financing needs by country
gen justice_needs = pc_justice_needs * pop2025
 label variable justice_needs "Justice public financing need using income groups, 2025 (Const. USD 2015)"

gen justice_needs2015 = pc_justice_needs * pop2015
 label variable justice_needs2015 "Justice public financing need using income groups, 2015 (Const. USD 2015)"

gen justice_needs2030 = pc_justice_needs * pop2030
 label variable justice_needs2030 "Justice public financing need using income groups, 2030 (Const. USD 2015)"
 
* Total funding need by income group
bysort incomegroup2015: egen incjustice_need = sum(justice_needs)
 label variable incjustice_need "Justice public financing need by income group, 2025 (Const. USD 2015)"

bysort incomegroup2015: egen incjustice_need2015 = sum(justice_needs2015)
 label variable incjustice_need2015 "Justice public financing need by income group, 2015 (Const. USD 2015)"

bysort incomegroup2015: egen incjustice_need2030 = sum(justice_needs2030)
 label variable incjustice_need2030 "Justice public financing need by income group, 2030 (Const. USD 2015)"
 
* Per capita funding need by income group
bysort incomegroup2015: egen incpc_justice_need = wtmean(pc_justice_needs), weight(pop2025)
 label variable incpc_justice_need "Per capita justice public financing need by income group, 2025 (Const. USD 2015)"

bysort incomegroup2015: egen incpc_justice_need2015 = wtmean(pc_justice_needs), weight(pop2015)
 label variable incpc_justice_need2015 "Per capita justice public financing need by income group, 2015 (Const. USD 2015)"

bysort incomegroup2015: egen incpc_justice_need2030 = wtmean(pc_justice_needs), weight(pop2030)
 label variable incpc_justice_need2015 "Per capita justice public financing need by income group, 2015 (Const. USD 2015)"
 
recast str3 countrycode
recast str30 countryname

drop if incomegroup2015 == "H"

save "output\costing\justice_needs_clean.dta", replace


********************************************************************************
********************************************************************************
*** 9. Merge all datasets together
********************************************************************************
********************************************************************************

** Agriculture
use "output\costing\ag_public_rural.dta", clear
keep countryname countrycode ag_need1530 pc_ag_need1530 incag_need1530 incpc_ag_need1530 pc_ag_need2015 ag_need1530 ag_need2030 pc_ag_need2030 incag_need2030 incpc_ag_need2030 pc_ag_need2030 pop2015 

** Education
merge 1:1 countrycode using "output\costing\educ_needs_clean.dta", keepusing(countryname educ_needs pc_educ_needs incpc_educ_needs pc_educ_needs2015 educ_needs2030 pc_educ_needs2030 incpc_educ_needs2030 pc_educ_needs2030)
drop _merge

** Health
merge 1:1 countrycode using "output\costing\health_needs_clean.dta", keepusing(incomegroup2015 countryname health_need2030 pc_health_need2030 pc_health_need2015 inchealth_need2030 incpc_health_need2030) 
drop _merge

** Social spending
merge 1:1 countrycode using "output\costing\pgtSS_needs_clean.dta", keepusing(incomegroup2015 countryname pgtSS_need2015 pgtSS_need2025 pgtSS_need2030 pc_pgtSS_need2015 pc_pgtSS_need2025 incpgtSS_need2015 incpgtSS_need2025 incpc_pgtSS_need2015 incpc_pgtSS_need2025) 
drop _merge 
 
** Infrastructure
merge 1:1 countrycode using "output\costing\infrastructure_needs_clean.dta", keepusing(countryname pc_energy_needs2025 pc_energy_needs2030 pc_flood_needs2025 pc_flood_needs2030 pc_transport_needs2025 pc_transport_needs2030 pc_energy_needs2015 pc_flood_needs2015 pc_transport_needs2015 energy_needs2025 flood_needs2025 transport_needs2025 energy_needs2030 flood_needs2030 transport_needs2030 incenergy_needs2025 incflood_needs2025 inctransport_needs2025 incflood_needs2030 inctransport_needs2030 incpc_energy_needs2030 incpc_energy_needs2025 incpc_flood_needs2025 incpc_transport_needs2025 incpc_energy_needs2030 incpc_flood_needs2030 incpc_transport_needs2030)
 drop _merge

** WASH
merge 1:1 countrycode using "output\costing\WASH_needs_clean.dta", keepusing(countryname wash_need2025 pc_wash_need2025 wash_need2030 pc_wash_need2030 pc_wash_need2015 incwash_need2025 incpc_wash_need2025 incwash_need2030 incpc_wash_need2030)
drop _merge 

** Conservation
merge 1:1 countrycode using "output\costing\biodiversity_needs_clean.dta", keepusing(countryname biodiversity_needs pc_biodiversity_needs pc_biodiversity_needs2015 incpc_biodiversity_need biodiversity_needs2030 pc_biodiversity_needs2030 incpc_biodiversity_need2030)
drop _merge 
 
** Justice
merge 1:1 countrycode using "output\costing\justice_needs_clean.dta", keepusing(countryname justice_needs pc_justice_needs incjustice_need incpc_justice_need justice_needs2030 incjustice_need2030 incpc_justice_need2030)
drop _merge 

drop if incomegroup2015 == "H"

********************************************************************************  
* A . Total and per capita country need
********************************************************************************  

* 2025
gen pc_total_need = pc_ag_need1530 + pc_educ_needs + pc_health_need2030 + pc_pgtSS_need2025 + pc_energy_needs2025 + pc_flood_needs2025 + pc_transport_needs2025 + pc_wash_need2025 + pc_biodiversity_needs + pc_justice_needs
 label var pc_total_need "Total per capita needs, 2025 where possible"

gen total_need = ag_need1530 + educ_needs + health_need2030 + pgtSS_need2025 + energy_needs2025 + flood_needs2025 + transport_needs2025 + wash_need2025 + biodiversity_needs + justice_needs
 label var total_need "Total needs, 2025 where possible"

 
* 2015
gen pc_total_need2015 = pc_ag_need2015 + pc_educ_needs2015 + pc_health_need2015 + pc_pgtSS_need2015 + pc_energy_needs2015 + pc_flood_needs2015 + pc_transport_needs2015 + pc_wash_need2015 + pc_biodiversity_needs2015 + pc_justice_needs
 label var pc_total_need "Total per capita needs, 2015"
 
gen total_need2015 = (pc_ag_need2015 + pc_educ_needs2015 + pc_health_need2015 + pc_pgtSS_need2015 + pc_energy_needs2015 + pc_flood_needs2015 + pc_transport_needs2015 + pc_wash_need2015 + pc_biodiversity_needs2015 + pc_justice_needs)*pop2015
label var total_need2015 "Total needs, 2015 where possible"


foreach var of varlist pc_ag_need2015 pc_educ_needs2015 pc_health_need2015 pc_pgtSS_need2015 pc_energy_needs2015 pc_flood_needs2015 pc_transport_needs2015 pc_wash_need2015 pc_biodiversity_needs2015 pc_justice_needs {
gen total_`var'=`var'*pop2015
}

********************************************************************************  
* B . 2025 needs by income group
******************************************************************************** 
 
gen incpc_total_need = incpc_ag_need1530 + incpc_educ_needs + incpc_health_need2030 + incpc_pgtSS_need2025 + incpc_energy_needs2025 + incpc_flood_needs2025 + incpc_transport_needs2025 + incpc_wash_need2025 + incpc_biodiversity_need + incpc_justice_need

********************************************************************************  
* C . % of GDP country needs
******************************************************************************** 

* Merge with GDP data
merge 1:1 countrycode using "output\merged_full_totals_cons.dta", keepusing(pc_cntytolgdp_2015 pc_cntytolgdp_2030 gdp_traj2030 pc_gdp_traj2025 gdp_traj_avg1530 gdp_traj2015 pop2015 pop2025 pop2030 pop_avg1530 count2015)
drop if _merge == 2
drop _merge

merge 1:1 countrycode using "output\gdp_cons2030wide.dta", keepusing(gdp_traj2025)
drop if _merge == 2
drop _merge

gen pc_gdp_traj1530 = gdp_traj_avg1530 / pop_avg1530
 label variable pc_gdp_traj1530 "GDP per capita, avg. 2015-2030 (USD constant 2015)" 
 
gen lpc_total_need = ln(pc_total_need)
 label variable lpc_total_need "Total need, log p.c., 2025 where possible (USD constant 2015)"

gen lpc_cntytolgdp_2030 = ln(pc_cntytolgdp_2030)
  label variable lpc_cntytolgdp_2030 "Total current spending, log p.c. 2030"

foreach var of varlist ag_need1530 educ_needs health_need2030 pgtSS_need2025 energy_needs2025 flood_needs2025 transport_needs2025 wash_need2025 biodiversity_needs justice_needs total_need {
gen `var'_shGDP=`var' /gdp_traj2025
}

save "output/needs_combine_full_cons.dta", replace